package chapter02
object Test38_WordCount1 {
  def main(args: Array[String]): Unit = {
    val tupleList = List(("Hello Scala Spark World ", 4),
      ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
    val tuples = tupleList.map(e => {
      (e._1.split(" ").toList, e._2)
    })
    println(tuples)
    val list = tuples.flatMap(e => {
      e._1.map(f => (f, e._2))
    })
    val stringToTuples = list.groupBy(e => {
      e._1
    })
    println(stringToTuples)
    println(stringToTuples.mapValues(e=>e.map(f=>{f._2}).sum))
    //转换为简单的wordcount
    val list1 = tupleList.map(e => {
      (e._1 + " ") * e._2
    })
    println(list1.map(e=>e.split(" ").toList.filter(e=>e!="")))
    println(list1.flatMap(e => e.split("\\s+").toList).
      groupBy(e=>e).mapValues(e=>e.size))
  }
}
